View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.util;
20  
21  import java.io.UnsupportedEncodingException;
22  import java.net.URI;
23  import java.net.URLEncoder;
24  
25  import org.eclipse.jetty.util.log.Log;
26  
27  
28  
29  /* ------------------------------------------------------------ */
30  /** URI Holder.
31   * This class assists with the decoding and encoding or HTTP URI's.
32   * It differs from the java.net.URL class as it does not provide
33   * communications ability, but it does assist with query string
34   * formatting.
35   * <P>UTF-8 encoding is used by default for % encoded characters. This
36   * may be overridden with the org.eclipse.jetty.util.URI.charset system property.
37   * @see UrlEncoded
38   * 
39   */
40  public class URIUtil
41      implements Cloneable
42  {
43      public static final String SLASH="/";
44      public static final String HTTP="http";
45      public static final String HTTP_COLON="http:";
46      public static final String HTTPS="https";
47      public static final String HTTPS_COLON="https:";
48  
49      // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
50      public static final String __CHARSET=System.getProperty("org.eclipse.jetty.util.URI.charset",StringUtil.__UTF8);
51      
52      private URIUtil()
53      {}
54      
55      /* ------------------------------------------------------------ */
56      /** Encode a URI path.
57       * This is the same encoding offered by URLEncoder, except that
58       * the '/' character is not encoded.
59       * @param path The path the encode
60       * @return The encoded path
61       */
62      public static String encodePath(String path)
63      {
64          if (path==null || path.length()==0)
65              return path;
66          
67          StringBuilder buf = encodePath(null,path);
68          return buf==null?path:buf.toString();
69      }
70          
71      /* ------------------------------------------------------------ */
72      /** Encode a URI path.
73       * @param path The path the encode
74       * @param buf StringBuilder to encode path into (or null)
75       * @return The StringBuilder or null if no substitutions required.
76       */
77      public static StringBuilder encodePath(StringBuilder buf, String path)
78      {
79          byte[] bytes=null;
80          if (buf==null)
81          {
82          loop:
83              for (int i=0;i<path.length();i++)
84              {
85                  char c=path.charAt(i);
86                  switch(c)
87                  {
88                      case '%':
89                      case '?':
90                      case ';':
91                      case '#':
92                      case '\'':
93                      case '"':
94                      case '<':
95                      case '>':
96                      case ' ':
97                          buf=new StringBuilder(path.length()*2);
98                          break loop;
99                      default:
100                         if (c>127)
101                         {
102                             try
103                             {
104                                 bytes=path.getBytes(URIUtil.__CHARSET);
105                             }
106                             catch (UnsupportedEncodingException e)
107                             {
108                                 throw new IllegalStateException(e);
109                             }
110                             buf=new StringBuilder(path.length()*2);
111                             break loop;
112                         }
113                        
114                 }
115             }
116             if (buf==null)
117                 return null;
118         }
119         
120         synchronized(buf)
121         {
122             if (bytes!=null)
123             {
124                 for (int i=0;i<bytes.length;i++)
125                 {
126                     byte c=bytes[i];       
127                     switch(c)
128                     {
129                       case '%':
130                           buf.append("%25");
131                           continue;
132                       case '?':
133                           buf.append("%3F");
134                           continue;
135                       case ';':
136                           buf.append("%3B");
137                           continue;
138                       case '#':
139                           buf.append("%23");
140                           continue;
141                       case '"':
142                           buf.append("%22");
143                           continue;
144                       case '\'':
145                           buf.append("%27");
146                           continue;
147                       case '<':
148                           buf.append("%3C");
149                           continue;
150                       case '>':
151                           buf.append("%3E");
152                           continue;
153                       case ' ':
154                           buf.append("%20");
155                           continue;
156                       default:
157                           if (c<0)
158                           {
159                               buf.append('%');
160                               TypeUtil.toHex(c,buf);
161                           }
162                           else
163                               buf.append((char)c);
164                           continue;
165                     }
166                 }
167                 
168             }
169             else
170             {
171                 for (int i=0;i<path.length();i++)
172                 {
173                     char c=path.charAt(i);       
174                     switch(c)
175                     {
176                         case '%':
177                             buf.append("%25");
178                             continue;
179                         case '?':
180                             buf.append("%3F");
181                             continue;
182                         case ';':
183                             buf.append("%3B");
184                             continue;
185                         case '#':
186                             buf.append("%23");
187                             continue;
188                         case '"':
189                             buf.append("%22");
190                             continue;
191                         case '\'':
192                             buf.append("%27");
193                             continue;
194                         case '<':
195                             buf.append("%3C");
196                             continue;
197                         case '>':
198                             buf.append("%3E");
199                             continue;
200                         case ' ':
201                             buf.append("%20");
202                             continue;
203                         default:
204                             buf.append(c);
205                             continue;
206                     }
207                 }
208             }
209         }
210 
211         return buf;
212     }
213     
214     /* ------------------------------------------------------------ */
215     /** Encode a URI path.
216      * @param path The path the encode
217      * @param buf StringBuilder to encode path into (or null)
218      * @param encode String of characters to encode. % is always encoded.
219      * @return The StringBuilder or null if no substitutions required.
220      */
221     public static StringBuilder encodeString(StringBuilder buf,
222                                              String path,
223                                              String encode)
224     {
225         if (buf==null)
226         {
227         loop:
228             for (int i=0;i<path.length();i++)
229             {
230                 char c=path.charAt(i);
231                 if (c=='%' || encode.indexOf(c)>=0)
232                 {    
233                     buf=new StringBuilder(path.length()<<1);
234                     break loop;
235                 }
236             }
237             if (buf==null)
238                 return null;
239         }
240         
241         synchronized(buf)
242         {
243             for (int i=0;i<path.length();i++)
244             {
245                 char c=path.charAt(i);
246                 if (c=='%' || encode.indexOf(c)>=0)
247                 {
248                     buf.append('%');
249                     StringUtil.append(buf,(byte)(0xff&c),16);
250                 }
251                 else
252                     buf.append(c);
253             }
254         }
255 
256         return buf;
257     }
258     
259     /* ------------------------------------------------------------ */
260     /* Decode a URI path and strip parameters
261      * @param path The path the encode
262      * @param buf StringBuilder to encode path into
263      */
264     public static String decodePath(String path)
265     {
266         if (path==null)
267             return null;
268         // Array to hold all converted characters
269         char[] chars=null;
270         int n=0;
271         // Array to hold a sequence of %encodings
272         byte[] bytes=null;
273         int b=0;
274         
275         int len=path.length();
276         
277         for (int i=0;i<len;i++)
278         {
279             char c = path.charAt(i);
280 
281             if (c=='%' && (i+2)<len)
282             {
283                 if (chars==null)
284                 {
285                     chars=new char[len];
286                     bytes=new byte[len];
287                     path.getChars(0,i,chars,0);
288                 }
289                 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16));
290                 i+=2;
291                 continue;
292             }
293             else if (c==';')
294             {
295                 if (chars==null)
296                 {
297                     chars=new char[len];
298                     path.getChars(0,i,chars,0);
299                     n=i;
300                 }
301                 break;
302             }
303             else if (bytes==null)
304             {
305                 n++;
306                 continue;
307             }
308             
309             // Do we have some bytes to convert?
310             if (b>0)
311             {
312                 // convert series of bytes and add to chars
313                 String s;
314                 try
315                 {
316                     s=new String(bytes,0,b,__CHARSET);
317                 }
318                 catch (UnsupportedEncodingException e)
319                 {       
320                     s=new String(bytes,0,b);
321                 }
322                 s.getChars(0,s.length(),chars,n);
323                 n+=s.length();
324                 b=0;
325             }
326             
327             chars[n++]=c;
328         }
329 
330         if (chars==null)
331             return path;
332 
333         // if we have a remaining sequence of bytes
334         if (b>0)
335         {
336             // convert series of bytes and add to chars
337             String s;
338             try
339             {
340                 s=new String(bytes,0,b,__CHARSET);
341             }
342             catch (UnsupportedEncodingException e)
343             {       
344                 s=new String(bytes,0,b);
345             }
346             s.getChars(0,s.length(),chars,n);
347             n+=s.length();
348         }
349         
350         return new String(chars,0,n);
351     }
352     
353     /* ------------------------------------------------------------ */
354     /* Decode a URI path and strip parameters.
355      * @param path The path the encode
356      * @param buf StringBuilder to encode path into
357      */
358     public static String decodePath(byte[] buf, int offset, int length)
359     {
360         byte[] bytes=null;
361         int n=0;
362         
363         for (int i=0;i<length;i++)
364         {
365             byte b = buf[i + offset];
366             
367             if (b=='%' && (i+2)<length)
368             {
369                 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16));
370                 i+=2;
371             }
372             else if (b==';')
373             {
374                 length=i;
375                 break;
376             }
377             else if (bytes==null)
378             {
379                 n++;
380                 continue;
381             }
382             
383             if (bytes==null)
384             {
385                 bytes=new byte[length];
386                 for (int j=0;j<n;j++)
387                     bytes[j]=buf[j + offset];
388             }
389             
390             bytes[n++]=b;
391         }
392 
393         if (bytes==null)
394             return StringUtil.toString(buf,offset,length,__CHARSET);
395         return StringUtil.toString(bytes,0,n,__CHARSET);
396     }
397 
398     
399     /* ------------------------------------------------------------ */
400     /** Add two URI path segments.
401      * Handles null and empty paths, path and query params (eg ?a=b or
402      * ;JSESSIONID=xxx) and avoids duplicate '/'
403      * @param p1 URI path segment (should be encoded)
404      * @param p2 URI path segment (should be encoded)
405      * @return Legally combined path segments.
406      */
407     public static String addPaths(String p1, String p2)
408     {
409         if (p1==null || p1.length()==0)
410         {
411             if (p1!=null && p2==null)
412                 return p1;
413             return p2;
414         }
415         if (p2==null || p2.length()==0)
416             return p1;
417         
418         int split=p1.indexOf(';');
419         if (split<0)
420             split=p1.indexOf('?');
421         if (split==0)
422             return p2+p1;
423         if (split<0)
424             split=p1.length();
425 
426         StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
427         buf.append(p1);
428         
429         if (buf.charAt(split-1)=='/')
430         {
431             if (p2.startsWith(URIUtil.SLASH))
432             {
433                 buf.deleteCharAt(split-1);
434                 buf.insert(split-1,p2);
435             }
436             else
437                 buf.insert(split,p2);
438         }
439         else
440         {
441             if (p2.startsWith(URIUtil.SLASH))
442                 buf.insert(split,p2);
443             else
444             {
445                 buf.insert(split,'/');
446                 buf.insert(split+1,p2);
447             }
448         }
449 
450         return buf.toString();
451     }
452     
453     /* ------------------------------------------------------------ */
454     /** Return the parent Path.
455      * Treat a URI like a directory path and return the parent directory.
456      */
457     public static String parentPath(String p)
458     {
459         if (p==null || URIUtil.SLASH.equals(p))
460             return null;
461         int slash=p.lastIndexOf('/',p.length()-2);
462         if (slash>=0)
463             return p.substring(0,slash+1);
464         return null;
465     }
466     
467     /* ------------------------------------------------------------ */
468     /** Convert a path to a cananonical form.
469      * All instances of "." and ".." are factored out.  Null is returned
470      * if the path tries to .. above its root.
471      * @param path 
472      * @return path or null.
473      */
474     public static String canonicalPath(String path)
475     {
476         if (path==null || path.length()==0)
477             return path;
478 
479         int end=path.length();
480         int start = path.lastIndexOf('/', end);
481 
482     search:
483         while (end>0)
484         {
485             switch(end-start)
486             {
487               case 2: // possible single dot
488                   if (path.charAt(start+1)!='.')
489                       break;
490                   break search;
491               case 3: // possible double dot
492                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
493                       break;
494                   break search;
495             }
496             
497             end=start;
498             start=path.lastIndexOf('/',end-1);
499         }
500 
501         // If we have checked the entire string
502         if (start>=end)
503             return path;
504         
505         StringBuilder buf = new StringBuilder(path);
506         int delStart=-1;
507         int delEnd=-1;
508         int skip=0;
509         
510         while (end>0)
511         {
512             switch(end-start)
513             {       
514               case 2: // possible single dot
515                   if (buf.charAt(start+1)!='.')
516                   {
517                       if (skip>0 && --skip==0)
518                       {   
519                           delStart=start>=0?start:0;
520                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
521                               delStart++;
522                       }
523                       break;
524                   }
525                   
526                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
527                       break;
528                   
529                   if(delEnd<0)
530                       delEnd=end;
531                   delStart=start;
532                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
533                   {
534                       delStart++;
535                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
536                           delEnd++;
537                       break;
538                   }
539                   if (end==buf.length())
540                       delStart++;
541                   
542                   end=start--;
543                   while (start>=0 && buf.charAt(start)!='/')
544                       start--;
545                   continue;
546                   
547               case 3: // possible double dot
548                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
549                   {
550                       if (skip>0 && --skip==0)
551                       {   delStart=start>=0?start:0;
552                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
553                               delStart++;
554                       }
555                       break;
556                   }
557                   
558                   delStart=start;
559                   if (delEnd<0)
560                       delEnd=end;
561 
562                   skip++;
563                   end=start--;
564                   while (start>=0 && buf.charAt(start)!='/')
565                       start--;
566                   continue;
567 
568               default:
569                   if (skip>0 && --skip==0)
570                   {
571                       delStart=start>=0?start:0;
572                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
573                           delStart++;
574                   }
575             }     
576             
577             // Do the delete
578             if (skip<=0 && delStart>=0 && delEnd>=delStart)
579             {  
580                 buf.delete(delStart,delEnd);
581                 delStart=delEnd=-1;
582                 if (skip>0)
583                     delEnd=end;
584             }
585             
586             end=start--;
587             while (start>=0 && buf.charAt(start)!='/')
588                 start--;
589         }      
590 
591         // Too many ..
592         if (skip>0)
593             return null;
594         
595         // Do the delete
596         if (delEnd>=0)
597             buf.delete(delStart,delEnd);
598 
599         return buf.toString();
600     }
601 
602     /* ------------------------------------------------------------ */
603     /** Convert a path to a compact form.
604      * All instances of "//" and "///" etc. are factored out to single "/" 
605      * @param path 
606      * @return path
607      */
608     public static String compactPath(String path)
609     {
610         if (path==null || path.length()==0)
611             return path;
612 
613         int state=0;
614         int end=path.length();
615         int i=0;
616         
617         loop:
618         while (i<end)
619         {
620             char c=path.charAt(i);
621             switch(c)
622             {
623                 case '?':
624                     return path;
625                 case '/':
626                     state++;
627                     if (state==2)
628                         break loop;
629                     break;
630                 default:
631                     state=0;
632             }
633             i++;
634         }
635         
636         if (state<2)
637             return path;
638         
639         StringBuffer buf = new StringBuffer(path.length());
640         buf.append(path,0,i);
641         
642         loop2:
643         while (i<end)
644         {
645             char c=path.charAt(i);
646             switch(c)
647             {
648                 case '?':
649                     buf.append(path,i,end);
650                     break loop2;
651                 case '/':
652                     if (state++==0)
653                         buf.append(c);
654                     break;
655                 default:
656                     state=0;
657                     buf.append(c);
658             }
659             i++;
660         }
661         
662         return buf.toString();
663     }
664 
665     /* ------------------------------------------------------------ */
666     /** 
667      * @param uri URI
668      * @return True if the uri has a scheme
669      */
670     public static boolean hasScheme(String uri)
671     {
672         for (int i=0;i<uri.length();i++)
673         {
674             char c=uri.charAt(i);
675             if (c==':')
676                 return true;
677             if (!(c>='a'&&c<='z' ||
678                   c>='A'&&c<='Z' ||
679                   (i>0 &&(c>='0'&&c<='9' ||
680                           c=='.' ||
681                           c=='+' ||
682                           c=='-'))
683                   ))
684                 break;
685         }
686         return false;
687     }
688     
689 }
690 
691 
692