View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2016 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.util;
20  
21  import java.nio.charset.Charset;
22  import java.nio.charset.StandardCharsets;
23  
24  import org.eclipse.jetty.util.Utf8Appendable.NotUtf8Exception;
25  import org.eclipse.jetty.util.log.Log;
26  import org.eclipse.jetty.util.log.Logger;
27  
28  /** 
29   * URI Utility methods.
30   * <p>
31   * This class assists with the decoding and encoding or HTTP URI's.
32   * It differs from the java.net.URL class as it does not provide
33   * communications ability, but it does assist with query string
34   * formatting.
35   * </p>
36   * <p>
37   * UTF-8 encoding is used by default for % encoded characters. This
38   * may be overridden with the org.eclipse.jetty.util.URI.charset system property.
39   * </p>
40   * 
41   * @see UrlEncoded
42   */
43  public class URIUtil
44      implements Cloneable
45  {
46      private static final Logger LOG = Log.getLogger(URIUtil.class);
47      public static final String SLASH="/";
48      public static final String HTTP="http";
49      public static final String HTTP_COLON="http:";
50      public static final String HTTPS="https";
51      public static final String HTTPS_COLON="https:";
52  
53      // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
54      public static final Charset __CHARSET=StandardCharsets.UTF_8 ;
55  
56      private URIUtil()
57      {}
58      
59      /* ------------------------------------------------------------ */
60      /** Encode a URI path.
61       * This is the same encoding offered by URLEncoder, except that
62       * the '/' character is not encoded.
63       * @param path The path the encode
64       * @return The encoded path
65       */
66      public static String encodePath(String path)
67      {
68          if (path==null || path.length()==0)
69              return path;
70          
71          StringBuilder buf = encodePath(null,path);
72          return buf==null?path:buf.toString();
73      }
74          
75      /* ------------------------------------------------------------ */
76      /** Encode a URI path.
77       * @param path The path the encode
78       * @param buf StringBuilder to encode path into (or null)
79       * @return The StringBuilder or null if no substitutions required.
80       */
81      public static StringBuilder encodePath(StringBuilder buf, String path)
82      {
83          byte[] bytes=null;
84          if (buf==null)
85          {
86              loop: for (int i=0;i<path.length();i++)
87              {
88                  char c=path.charAt(i);
89                  switch(c)
90                  {
91                      case '%':
92                      case '?':
93                      case ';':
94                      case '#':
95                      case '\'':
96                      case '"':
97                      case '<':
98                      case '>':
99                      case ' ':
100                     case '[':
101                     case ']':
102                         buf=new StringBuilder(path.length()*2);
103                         break loop;
104                     default:
105                         if (c>127)
106                         {
107                             bytes=path.getBytes(URIUtil.__CHARSET);
108                             buf=new StringBuilder(path.length()*2);
109                             break loop;
110                         }
111                 }
112             }
113             if (buf==null)
114                 return null;
115         }
116 
117         if (bytes!=null)
118         {
119             for (int i=0;i<bytes.length;i++)
120             {
121                 byte c=bytes[i];       
122                 switch(c)
123                 {
124                     case '%':
125                         buf.append("%25");
126                         continue;
127                     case '?':
128                         buf.append("%3F");
129                         continue;
130                     case ';':
131                         buf.append("%3B");
132                         continue;
133                     case '#':
134                         buf.append("%23");
135                         continue;
136                     case '"':
137                         buf.append("%22");
138                         continue;
139                     case '\'':
140                         buf.append("%27");
141                         continue;
142                     case '<':
143                         buf.append("%3C");
144                         continue;
145                     case '>':
146                         buf.append("%3E");
147                         continue;
148                     case ' ':
149                         buf.append("%20");
150                         continue;
151                     case '[':
152                         buf.append("%5B");
153                         continue;
154                     case ']':
155                         buf.append("%5D");
156                         continue;
157                     default:
158                         if (c<0)
159                         {
160                             buf.append('%');
161                             TypeUtil.toHex(c,buf);
162                         }
163                         else
164                             buf.append((char)c);
165                         continue;
166                 }
167             }
168         }
169         else
170         {
171             for (int i=0;i<path.length();i++)
172             {
173                 char c=path.charAt(i);       
174                 switch(c)
175                 {
176                     case '%':
177                         buf.append("%25");
178                         continue;
179                     case '?':
180                         buf.append("%3F");
181                         continue;
182                     case ';':
183                         buf.append("%3B");
184                         continue;
185                     case '#':
186                         buf.append("%23");
187                         continue;
188                     case '"':
189                         buf.append("%22");
190                         continue;
191                     case '\'':
192                         buf.append("%27");
193                         continue;
194                     case '<':
195                         buf.append("%3C");
196                         continue;
197                     case '>':
198                         buf.append("%3E");
199                         continue;
200                     case ' ':
201                         buf.append("%20");
202                         continue;
203                     case '[':
204                         buf.append("%5B");
205                         continue;
206                     case ']':
207                         buf.append("%5D");
208                         continue;
209                     default:
210                         buf.append(c);
211                         continue;
212                 }
213             }
214         }
215 
216 
217         return buf;
218     }
219     
220     /* ------------------------------------------------------------ */
221     /** Encode a URI path.
222      * @param path The path the encode
223      * @param buf StringBuilder to encode path into (or null)
224      * @param encode String of characters to encode. % is always encoded.
225      * @return The StringBuilder or null if no substitutions required.
226      */
227     public static StringBuilder encodeString(StringBuilder buf,
228                                              String path,
229                                              String encode)
230     {
231         if (buf==null)
232         {
233         loop:
234             for (int i=0;i<path.length();i++)
235             {
236                 char c=path.charAt(i);
237                 if (c=='%' || encode.indexOf(c)>=0)
238                 {    
239                     buf=new StringBuilder(path.length()<<1);
240                     break loop;
241                 }
242             }
243             if (buf==null)
244                 return null;
245         }
246         
247         synchronized(buf)
248         {
249             for (int i=0;i<path.length();i++)
250             {
251                 char c=path.charAt(i);
252                 if (c=='%' || encode.indexOf(c)>=0)
253                 {
254                     buf.append('%');
255                     StringUtil.append(buf,(byte)(0xff&c),16);
256                 }
257                 else
258                     buf.append(c);
259             }
260         }
261 
262         return buf;
263     }
264     
265     /* ------------------------------------------------------------ */
266     /* Decode a URI path and strip parameters
267      */
268     public static String decodePath(String path)
269     {
270         return decodePath(path,0,path.length());
271     }
272 
273     /* ------------------------------------------------------------ */
274     /* Decode a URI path and strip parameters of UTF-8 path
275      */
276     public static String decodePath(String path, int offset, int length)
277     {
278         try
279         {
280             Utf8StringBuilder builder=null;
281             int end=offset+length;
282             for (int i=offset;i<end;i++)
283             {
284                 char c = path.charAt(i);
285                 switch(c)
286                 {
287                     case '%':
288                         if (builder==null)
289                         {
290                             builder=new Utf8StringBuilder(path.length());
291                             builder.append(path,offset,i-offset);
292                         }
293                         if ((i+2)<end)
294                         {
295                             char u=path.charAt(i+1);
296                             if (u=='u')
297                             {
298                                 // TODO this is wrong. This is a codepoint not a char
299                                 builder.append((char)(0xffff&TypeUtil.parseInt(path,i+2,4,16)));
300                                 i+=5;
301                             }
302                             else
303                             {
304                                 builder.append((byte)(0xff&(TypeUtil.convertHexDigit(u)*16+TypeUtil.convertHexDigit(path.charAt(i+2)))));
305                                 i+=2;
306                             }
307                         }
308                         else
309                         {
310                             throw new IllegalArgumentException("Bad URI % encoding");
311                         }
312 
313                         break;
314 
315                     case ';':
316                         if (builder==null)
317                         {
318                             builder=new Utf8StringBuilder(path.length());
319                             builder.append(path,offset,i-offset);
320                         }
321                         
322                         while(++i<end)
323                         {
324                             if (path.charAt(i)=='/')
325                             {
326                                 builder.append('/');
327                                 break;
328                             }
329                         }
330                         
331                         break;
332 
333                     default:
334                         if (builder!=null)
335                             builder.append(c);
336                         break;
337                 }
338             }
339 
340             if (builder!=null)
341                 return builder.toString();
342             if (offset==0 && length==path.length())
343                 return path;
344             return path.substring(offset,end);   
345         }
346         catch(NotUtf8Exception e)
347         {
348             LOG.warn(path.substring(offset,offset+length)+" "+e);
349             LOG.debug(e);
350             return decodeISO88591Path(path,offset,length);
351         }
352     }
353 
354     
355     /* ------------------------------------------------------------ */
356     /* Decode a URI path and strip parameters of ISO-8859-1 path
357      */
358     private static String decodeISO88591Path(String path, int offset, int length)
359     {
360         StringBuilder builder=null;
361         int end=offset+length;
362         for (int i=offset;i<end;i++)
363         {
364             char c = path.charAt(i);
365             switch(c)
366             {
367                 case '%':
368                     if (builder==null)
369                     {
370                         builder=new StringBuilder(path.length());
371                         builder.append(path,offset,i-offset);
372                     }
373                     if ((i+2)<end)
374                     {
375                         char u=path.charAt(i+1);
376                         if (u=='u')
377                         {
378                             // TODO this is wrong. This is a codepoint not a char
379                             builder.append((char)(0xffff&TypeUtil.parseInt(path,i+2,4,16)));
380                             i+=5;
381                         }
382                         else
383                         {
384                             builder.append((byte)(0xff&(TypeUtil.convertHexDigit(u)*16+TypeUtil.convertHexDigit(path.charAt(i+2)))));
385                             i+=2;
386                         }
387                     }
388                     else
389                     {
390                         throw new IllegalArgumentException();
391                     }
392                     
393                     break;
394                     
395                 case ';':
396                     if (builder==null)
397                     {
398                         builder=new StringBuilder(path.length());
399                         builder.append(path,offset,i-offset);
400                     }
401                     while(++i<end)
402                     {
403                         if (path.charAt(i)=='/')
404                         {
405                             builder.append('/');
406                             break;
407                         }
408                     }
409                     break;
410                     
411                     
412                 default:
413                     if (builder!=null)
414                         builder.append(c);
415                     break;
416             }
417         }
418 
419         if (builder!=null)
420             return builder.toString();
421         if (offset==0 && length==path.length())
422             return path;
423         return path.substring(offset,end);        
424     }
425 
426     
427     /* ------------------------------------------------------------ */
428     /** Add two URI path segments.
429      * Handles null and empty paths, path and query params (eg ?a=b or
430      * ;JSESSIONID=xxx) and avoids duplicate '/'
431      * @param p1 URI path segment (should be encoded)
432      * @param p2 URI path segment (should be encoded)
433      * @return Legally combined path segments.
434      */
435     public static String addPaths(String p1, String p2)
436     {
437         if (p1==null || p1.length()==0)
438         {
439             if (p1!=null && p2==null)
440                 return p1;
441             return p2;
442         }
443         if (p2==null || p2.length()==0)
444             return p1;
445         
446         int split=p1.indexOf(';');
447         if (split<0)
448             split=p1.indexOf('?');
449         if (split==0)
450             return p2+p1;
451         if (split<0)
452             split=p1.length();
453 
454         StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
455         buf.append(p1);
456         
457         if (buf.charAt(split-1)=='/')
458         {
459             if (p2.startsWith(URIUtil.SLASH))
460             {
461                 buf.deleteCharAt(split-1);
462                 buf.insert(split-1,p2);
463             }
464             else
465                 buf.insert(split,p2);
466         }
467         else
468         {
469             if (p2.startsWith(URIUtil.SLASH))
470                 buf.insert(split,p2);
471             else
472             {
473                 buf.insert(split,'/');
474                 buf.insert(split+1,p2);
475             }
476         }
477 
478         return buf.toString();
479     }
480     
481     /* ------------------------------------------------------------ */
482     /** Return the parent Path.
483      * Treat a URI like a directory path and return the parent directory.
484      * @param p the path to return a parent reference to
485      * @return the parent path of the URI
486      */
487     public static String parentPath(String p)
488     {
489         if (p==null || URIUtil.SLASH.equals(p))
490             return null;
491         int slash=p.lastIndexOf('/',p.length()-2);
492         if (slash>=0)
493             return p.substring(0,slash+1);
494         return null;
495     }
496     
497     /* ------------------------------------------------------------ */
498     /** Convert a path to a cananonical form.
499      * All instances of "." and ".." are factored out.  Null is returned
500      * if the path tries to .. above its root.
501      * @param path the path to convert
502      * @return path or null.
503      */
504     public static String canonicalPath(String path)
505     {
506         if (path==null || path.length()==0)
507             return path;
508 
509         int end=path.length();
510         int start = path.lastIndexOf('/', end);
511 
512     search:
513         while (end>0)
514         {
515             switch(end-start)
516             {
517               case 2: // possible single dot
518                   if (path.charAt(start+1)!='.')
519                       break;
520                   break search;
521               case 3: // possible double dot
522                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
523                       break;
524                   break search;
525             }
526             
527             end=start;
528             start=path.lastIndexOf('/',end-1);
529         }
530 
531         // If we have checked the entire string
532         if (start>=end)
533             return path;
534         
535         StringBuilder buf = new StringBuilder(path);
536         int delStart=-1;
537         int delEnd=-1;
538         int skip=0;
539         
540         while (end>0)
541         {
542             switch(end-start)
543             {       
544               case 2: // possible single dot
545                   if (buf.charAt(start+1)!='.')
546                   {
547                       if (skip>0 && --skip==0)
548                       {   
549                           delStart=start>=0?start:0;
550                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
551                               delStart++;
552                       }
553                       break;
554                   }
555                   
556                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
557                       break;
558                   
559                   if(delEnd<0)
560                       delEnd=end;
561                   delStart=start;
562                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
563                   {
564                       delStart++;
565                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
566                           delEnd++;
567                       break;
568                   }
569                   if (end==buf.length())
570                       delStart++;
571                   
572                   end=start--;
573                   while (start>=0 && buf.charAt(start)!='/')
574                       start--;
575                   continue;
576                   
577               case 3: // possible double dot
578                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
579                   {
580                       if (skip>0 && --skip==0)
581                       {   delStart=start>=0?start:0;
582                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
583                               delStart++;
584                       }
585                       break;
586                   }
587                   
588                   delStart=start;
589                   if (delEnd<0)
590                       delEnd=end;
591 
592                   skip++;
593                   end=start--;
594                   while (start>=0 && buf.charAt(start)!='/')
595                       start--;
596                   continue;
597 
598               default:
599                   if (skip>0 && --skip==0)
600                   {
601                       delStart=start>=0?start:0;
602                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
603                           delStart++;
604                   }
605             }     
606             
607             // Do the delete
608             if (skip<=0 && delStart>=0 && delEnd>=delStart)
609             {  
610                 buf.delete(delStart,delEnd);
611                 delStart=delEnd=-1;
612                 if (skip>0)
613                     delEnd=end;
614             }
615             
616             end=start--;
617             while (start>=0 && buf.charAt(start)!='/')
618                 start--;
619         }      
620 
621         // Too many ..
622         if (skip>0)
623             return null;
624         
625         // Do the delete
626         if (delEnd>=0)
627             buf.delete(delStart,delEnd);
628 
629         return buf.toString();
630     }
631 
632     /* ------------------------------------------------------------ */
633     /** Convert a path to a compact form.
634      * All instances of "//" and "///" etc. are factored out to single "/" 
635      * @param path the path to compact 
636      * @return the compacted path 
637      */
638     public static String compactPath(String path)
639     {
640         if (path==null || path.length()==0)
641             return path;
642 
643         int state=0;
644         int end=path.length();
645         int i=0;
646         
647         loop:
648         while (i<end)
649         {
650             char c=path.charAt(i);
651             switch(c)
652             {
653                 case '?':
654                     return path;
655                 case '/':
656                     state++;
657                     if (state==2)
658                         break loop;
659                     break;
660                 default:
661                     state=0;
662             }
663             i++;
664         }
665         
666         if (state<2)
667             return path;
668         
669         StringBuffer buf = new StringBuffer(path.length());
670         buf.append(path,0,i);
671         
672         loop2:
673         while (i<end)
674         {
675             char c=path.charAt(i);
676             switch(c)
677             {
678                 case '?':
679                     buf.append(path,i,end);
680                     break loop2;
681                 case '/':
682                     if (state++==0)
683                         buf.append(c);
684                     break;
685                 default:
686                     state=0;
687                     buf.append(c);
688             }
689             i++;
690         }
691         
692         return buf.toString();
693     }
694 
695     /* ------------------------------------------------------------ */
696     /** 
697      * @param uri URI
698      * @return True if the uri has a scheme
699      */
700     public static boolean hasScheme(String uri)
701     {
702         for (int i=0;i<uri.length();i++)
703         {
704             char c=uri.charAt(i);
705             if (c==':')
706                 return true;
707             if (!(c>='a'&&c<='z' ||
708                   c>='A'&&c<='Z' ||
709                   (i>0 &&(c>='0'&&c<='9' ||
710                           c=='.' ||
711                           c=='+' ||
712                           c=='-'))
713                   ))
714                 break;
715         }
716         return false;
717     }
718 
719     /* ------------------------------------------------------------ */
720     /**
721      * Create a new URI from the arguments, handling IPv6 host encoding and default ports
722      * @param scheme the URI scheme
723      * @param server the URI server
724      * @param port the URI port
725      * @param path the URI path
726      * @param query the URI query
727      * @return A String URI
728      */
729     public static String newURI(String scheme,String server, int port,String path,String query)
730     {
731         StringBuilder builder = newURIBuilder(scheme, server, port);
732         builder.append(path);
733         if (query!=null && query.length()>0)
734             builder.append('?').append(query);
735         return builder.toString();
736     }
737     
738     /* ------------------------------------------------------------ */
739     /**
740      * Create a new URI StringBuilder from the arguments, handling IPv6 host encoding and default ports
741      * @param scheme the URI scheme
742      * @param server the URI server
743      * @param port the URI port
744      * @return a StringBuilder containing URI prefix
745      */
746     public static StringBuilder newURIBuilder(String scheme,String server, int port)
747     {
748         StringBuilder builder = new StringBuilder();
749         appendSchemeHostPort(builder, scheme, server, port);
750         return builder;
751     }
752 
753     /* ------------------------------------------------------------ */
754     /** 
755      * Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports
756      * @param url StringBuilder to append to
757      * @param scheme the URI scheme
758      * @param server the URI server
759      * @param port the URI port
760      */
761     public static void appendSchemeHostPort(StringBuilder url,String scheme,String server, int port)
762     {
763         if (server.indexOf(':')>=0&&server.charAt(0)!='[')
764             url.append(scheme).append("://").append('[').append(server).append(']');
765         else
766             url.append(scheme).append("://").append(server);
767 
768         if (port > 0)
769         {
770             switch(scheme)
771             {
772                 case "http":
773                     if (port!=80) 
774                         url.append(':').append(port);
775                     break;
776                     
777                 case "https":
778                     if (port!=443) 
779                         url.append(':').append(port);
780                     break;
781 
782                 default:
783                     url.append(':').append(port);
784             }
785         }
786     }
787     
788     /* ------------------------------------------------------------ */
789     /** 
790      * Append scheme, host and port URI prefix, handling IPv6 address encoding and default ports
791      * @param url StringBuffer to append to
792      * @param scheme the URI scheme
793      * @param server the URI server
794      * @param port the URI port
795      */
796     public static void appendSchemeHostPort(StringBuffer url,String scheme,String server, int port)
797     {
798         synchronized (url)
799         {
800             if (server.indexOf(':')>=0&&server.charAt(0)!='[')
801                 url.append(scheme).append("://").append('[').append(server).append(']');
802             else
803                 url.append(scheme).append("://").append(server);
804 
805             if (port > 0)
806             {
807                 switch(scheme)
808                 {
809                     case "http":
810                         if (port!=80) 
811                             url.append(':').append(port);
812                         break;
813                         
814                     case "https":
815                         if (port!=443) 
816                             url.append(':').append(port);
817                         break;
818 
819                     default:
820                         url.append(':').append(port);
821                 }
822             }
823         }
824     }
825 
826     public static boolean equalsIgnoreEncodings(String uriA, String uriB)
827     {
828         int lenA=uriA.length();
829         int lenB=uriB.length();
830         int a=0;
831         int b=0;
832         
833         while (a<lenA && b<lenB)
834         {
835             int oa=uriA.charAt(a++);
836             int ca=oa;
837             if (ca=='%')
838                 ca=TypeUtil.convertHexDigit(uriA.charAt(a++))*16+TypeUtil.convertHexDigit(uriA.charAt(a++));
839             
840             int ob=uriB.charAt(b++);
841             int cb=ob;
842             if (cb=='%')
843                 cb=TypeUtil.convertHexDigit(uriB.charAt(b++))*16+TypeUtil.convertHexDigit(uriB.charAt(b++));
844             
845             if (ca=='/' && oa!=ob)
846                 return false;
847             
848             if (ca!=cb )
849                 return URIUtil.decodePath(uriA).equals(URIUtil.decodePath(uriB));
850         }
851         return a==lenA && b==lenB;
852     }
853 }
854 
855 
856