View Javadoc

1   //
2   //  ========================================================================
3   //  Copyright (c) 1995-2013 Mort Bay Consulting Pty. Ltd.
4   //  ------------------------------------------------------------------------
5   //  All rights reserved. This program and the accompanying materials
6   //  are made available under the terms of the Eclipse Public License v1.0
7   //  and Apache License v2.0 which accompanies this distribution.
8   //
9   //      The Eclipse Public License is available at
10  //      http://www.eclipse.org/legal/epl-v10.html
11  //
12  //      The Apache License v2.0 is available at
13  //      http://www.opensource.org/licenses/apache2.0.php
14  //
15  //  You may elect to redistribute this code under either of these licenses.
16  //  ========================================================================
17  //
18  
19  package org.eclipse.jetty.util;
20  
21  import java.nio.charset.Charset;
22  
23  
24  
25  /* ------------------------------------------------------------ */
26  /** URI Holder.
27   * This class assists with the decoding and encoding or HTTP URI's.
28   * It differs from the java.net.URL class as it does not provide
29   * communications ability, but it does assist with query string
30   * formatting.
31   * <P>UTF-8 encoding is used by default for % encoded characters. This
32   * may be overridden with the org.eclipse.jetty.util.URI.charset system property.
33   * @see UrlEncoded
34   * 
35   */
36  public class URIUtil
37      implements Cloneable
38  {
39      public static final String SLASH="/";
40      public static final String HTTP="http";
41      public static final String HTTP_COLON="http:";
42      public static final String HTTPS="https";
43      public static final String HTTPS_COLON="https:";
44  
45      // Use UTF-8 as per http://www.w3.org/TR/html40/appendix/notes.html#non-ascii-chars
46      public static final Charset __CHARSET=Charset.forName(System.getProperty("org.eclipse.jetty.util.URI.charset",StringUtil.__UTF8));
47      
48      private URIUtil()
49      {}
50      
51      /* ------------------------------------------------------------ */
52      /** Encode a URI path.
53       * This is the same encoding offered by URLEncoder, except that
54       * the '/' character is not encoded.
55       * @param path The path the encode
56       * @return The encoded path
57       */
58      public static String encodePath(String path)
59      {
60          if (path==null || path.length()==0)
61              return path;
62          
63          StringBuilder buf = encodePath(null,path);
64          return buf==null?path:buf.toString();
65      }
66          
67      /* ------------------------------------------------------------ */
68      /** Encode a URI path.
69       * @param path The path the encode
70       * @param buf StringBuilder to encode path into (or null)
71       * @return The StringBuilder or null if no substitutions required.
72       */
73      public static StringBuilder encodePath(StringBuilder buf, String path)
74      {
75          byte[] bytes=null;
76          if (buf==null)
77          {
78          loop:
79              for (int i=0;i<path.length();i++)
80              {
81                  char c=path.charAt(i);
82                  switch(c)
83                  {
84                      case '%':
85                      case '?':
86                      case ';':
87                      case '#':
88                      case '\'':
89                      case '"':
90                      case '<':
91                      case '>':
92                      case ' ':
93                          buf=new StringBuilder(path.length()*2);
94                          break loop;
95                      default:
96                          if (c>127)
97                          {
98                              bytes=path.getBytes(URIUtil.__CHARSET);
99                              buf=new StringBuilder(path.length()*2);
100                             break loop;
101                         }
102                        
103                 }
104             }
105             if (buf==null)
106                 return null;
107         }
108         
109         synchronized(buf)
110         {
111             if (bytes!=null)
112             {
113                 for (int i=0;i<bytes.length;i++)
114                 {
115                     byte c=bytes[i];       
116                     switch(c)
117                     {
118                       case '%':
119                           buf.append("%25");
120                           continue;
121                       case '?':
122                           buf.append("%3F");
123                           continue;
124                       case ';':
125                           buf.append("%3B");
126                           continue;
127                       case '#':
128                           buf.append("%23");
129                           continue;
130                       case '"':
131                           buf.append("%22");
132                           continue;
133                       case '\'':
134                           buf.append("%27");
135                           continue;
136                       case '<':
137                           buf.append("%3C");
138                           continue;
139                       case '>':
140                           buf.append("%3E");
141                           continue;
142                       case ' ':
143                           buf.append("%20");
144                           continue;
145                       default:
146                           if (c<0)
147                           {
148                               buf.append('%');
149                               TypeUtil.toHex(c,buf);
150                           }
151                           else
152                               buf.append((char)c);
153                           continue;
154                     }
155                 }
156                 
157             }
158             else
159             {
160                 for (int i=0;i<path.length();i++)
161                 {
162                     char c=path.charAt(i);       
163                     switch(c)
164                     {
165                         case '%':
166                             buf.append("%25");
167                             continue;
168                         case '?':
169                             buf.append("%3F");
170                             continue;
171                         case ';':
172                             buf.append("%3B");
173                             continue;
174                         case '#':
175                             buf.append("%23");
176                             continue;
177                         case '"':
178                             buf.append("%22");
179                             continue;
180                         case '\'':
181                             buf.append("%27");
182                             continue;
183                         case '<':
184                             buf.append("%3C");
185                             continue;
186                         case '>':
187                             buf.append("%3E");
188                             continue;
189                         case ' ':
190                             buf.append("%20");
191                             continue;
192                         default:
193                             buf.append(c);
194                             continue;
195                     }
196                 }
197             }
198         }
199 
200         return buf;
201     }
202     
203     /* ------------------------------------------------------------ */
204     /** Encode a URI path.
205      * @param path The path the encode
206      * @param buf StringBuilder to encode path into (or null)
207      * @param encode String of characters to encode. % is always encoded.
208      * @return The StringBuilder or null if no substitutions required.
209      */
210     public static StringBuilder encodeString(StringBuilder buf,
211                                              String path,
212                                              String encode)
213     {
214         if (buf==null)
215         {
216         loop:
217             for (int i=0;i<path.length();i++)
218             {
219                 char c=path.charAt(i);
220                 if (c=='%' || encode.indexOf(c)>=0)
221                 {    
222                     buf=new StringBuilder(path.length()<<1);
223                     break loop;
224                 }
225             }
226             if (buf==null)
227                 return null;
228         }
229         
230         synchronized(buf)
231         {
232             for (int i=0;i<path.length();i++)
233             {
234                 char c=path.charAt(i);
235                 if (c=='%' || encode.indexOf(c)>=0)
236                 {
237                     buf.append('%');
238                     StringUtil.append(buf,(byte)(0xff&c),16);
239                 }
240                 else
241                     buf.append(c);
242             }
243         }
244 
245         return buf;
246     }
247     
248     /* ------------------------------------------------------------ */
249     /* Decode a URI path and strip parameters
250      * @param path The path the encode
251      * @param buf StringBuilder to encode path into
252      */
253     public static String decodePath(String path)
254     {
255         if (path==null)
256             return null;
257         // Array to hold all converted characters
258         char[] chars=null;
259         int n=0;
260         // Array to hold a sequence of %encodings
261         byte[] bytes=null;
262         int b=0;
263         
264         int len=path.length();
265         
266         for (int i=0;i<len;i++)
267         {
268             char c = path.charAt(i);
269 
270             if (c=='%' && (i+2)<len)
271             {
272                 if (chars==null)
273                 {
274                     chars=new char[len];
275                     bytes=new byte[len];
276                     path.getChars(0,i,chars,0);
277                 }
278                 bytes[b++]=(byte)(0xff&TypeUtil.parseInt(path,i+1,2,16));
279                 i+=2;
280                 continue;
281             }
282             else if (c==';')
283             {
284                 if (chars==null)
285                 {
286                     chars=new char[len];
287                     path.getChars(0,i,chars,0);
288                     n=i;
289                 }
290                 break;
291             }
292             else if (bytes==null)
293             {
294                 n++;
295                 continue;
296             }
297             
298             // Do we have some bytes to convert?
299             if (b>0)
300             {
301                 String s=new String(bytes,0,b,__CHARSET);
302                 s.getChars(0,s.length(),chars,n);
303                 n+=s.length();
304                 b=0;
305             }
306             
307             chars[n++]=c;
308         }
309 
310         if (chars==null)
311             return path;
312 
313         // if we have a remaining sequence of bytes
314         if (b>0)
315         {
316             String s=new String(bytes,0,b,__CHARSET);
317             s.getChars(0,s.length(),chars,n);
318             n+=s.length();
319         }
320         
321         return new String(chars,0,n);
322     }
323     
324     /* ------------------------------------------------------------ */
325     /* Decode a URI path and strip parameters.
326      * @param path The path the encode
327      * @param buf StringBuilder to encode path into
328      */
329     public static String decodePath(byte[] buf, int offset, int length)
330     {
331         byte[] bytes=null;
332         int n=0;
333         
334         for (int i=0;i<length;i++)
335         {
336             byte b = buf[i + offset];
337             
338             if (b=='%' && (i+2)<length)
339             {
340                 b=(byte)(0xff&TypeUtil.parseInt(buf,i+offset+1,2,16));
341                 i+=2;
342             }
343             else if (b==';')
344             {
345                 length=i;
346                 break;
347             }
348             else if (bytes==null)
349             {
350                 n++;
351                 continue;
352             }
353             
354             if (bytes==null)
355             {
356                 bytes=new byte[length];
357                 for (int j=0;j<n;j++)
358                     bytes[j]=buf[j + offset];
359             }
360             
361             bytes[n++]=b;
362         }
363 
364         if (bytes==null)
365             return new String(buf,offset,length,__CHARSET);
366         return new String(bytes,0,n,__CHARSET);
367     }
368 
369     
370     /* ------------------------------------------------------------ */
371     /** Add two URI path segments.
372      * Handles null and empty paths, path and query params (eg ?a=b or
373      * ;JSESSIONID=xxx) and avoids duplicate '/'
374      * @param p1 URI path segment (should be encoded)
375      * @param p2 URI path segment (should be encoded)
376      * @return Legally combined path segments.
377      */
378     public static String addPaths(String p1, String p2)
379     {
380         if (p1==null || p1.length()==0)
381         {
382             if (p1!=null && p2==null)
383                 return p1;
384             return p2;
385         }
386         if (p2==null || p2.length()==0)
387             return p1;
388         
389         int split=p1.indexOf(';');
390         if (split<0)
391             split=p1.indexOf('?');
392         if (split==0)
393             return p2+p1;
394         if (split<0)
395             split=p1.length();
396 
397         StringBuilder buf = new StringBuilder(p1.length()+p2.length()+2);
398         buf.append(p1);
399         
400         if (buf.charAt(split-1)=='/')
401         {
402             if (p2.startsWith(URIUtil.SLASH))
403             {
404                 buf.deleteCharAt(split-1);
405                 buf.insert(split-1,p2);
406             }
407             else
408                 buf.insert(split,p2);
409         }
410         else
411         {
412             if (p2.startsWith(URIUtil.SLASH))
413                 buf.insert(split,p2);
414             else
415             {
416                 buf.insert(split,'/');
417                 buf.insert(split+1,p2);
418             }
419         }
420 
421         return buf.toString();
422     }
423     
424     /* ------------------------------------------------------------ */
425     /** Return the parent Path.
426      * Treat a URI like a directory path and return the parent directory.
427      */
428     public static String parentPath(String p)
429     {
430         if (p==null || URIUtil.SLASH.equals(p))
431             return null;
432         int slash=p.lastIndexOf('/',p.length()-2);
433         if (slash>=0)
434             return p.substring(0,slash+1);
435         return null;
436     }
437     
438     /* ------------------------------------------------------------ */
439     /** Convert a path to a cananonical form.
440      * All instances of "." and ".." are factored out.  Null is returned
441      * if the path tries to .. above its root.
442      * @param path 
443      * @return path or null.
444      */
445     public static String canonicalPath(String path)
446     {
447         if (path==null || path.length()==0)
448             return path;
449 
450         int end=path.length();
451         int start = path.lastIndexOf('/', end);
452 
453     search:
454         while (end>0)
455         {
456             switch(end-start)
457             {
458               case 2: // possible single dot
459                   if (path.charAt(start+1)!='.')
460                       break;
461                   break search;
462               case 3: // possible double dot
463                   if (path.charAt(start+1)!='.' || path.charAt(start+2)!='.')
464                       break;
465                   break search;
466             }
467             
468             end=start;
469             start=path.lastIndexOf('/',end-1);
470         }
471 
472         // If we have checked the entire string
473         if (start>=end)
474             return path;
475         
476         StringBuilder buf = new StringBuilder(path);
477         int delStart=-1;
478         int delEnd=-1;
479         int skip=0;
480         
481         while (end>0)
482         {
483             switch(end-start)
484             {       
485               case 2: // possible single dot
486                   if (buf.charAt(start+1)!='.')
487                   {
488                       if (skip>0 && --skip==0)
489                       {   
490                           delStart=start>=0?start:0;
491                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
492                               delStart++;
493                       }
494                       break;
495                   }
496                   
497                   if(start<0 && buf.length()>2 && buf.charAt(1)=='/' && buf.charAt(2)=='/')
498                       break;
499                   
500                   if(delEnd<0)
501                       delEnd=end;
502                   delStart=start;
503                   if (delStart<0 || delStart==0&&buf.charAt(delStart)=='/')
504                   {
505                       delStart++;
506                       if (delEnd<buf.length() && buf.charAt(delEnd)=='/')
507                           delEnd++;
508                       break;
509                   }
510                   if (end==buf.length())
511                       delStart++;
512                   
513                   end=start--;
514                   while (start>=0 && buf.charAt(start)!='/')
515                       start--;
516                   continue;
517                   
518               case 3: // possible double dot
519                   if (buf.charAt(start+1)!='.' || buf.charAt(start+2)!='.')
520                   {
521                       if (skip>0 && --skip==0)
522                       {   delStart=start>=0?start:0;
523                           if(delStart>0 && delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
524                               delStart++;
525                       }
526                       break;
527                   }
528                   
529                   delStart=start;
530                   if (delEnd<0)
531                       delEnd=end;
532 
533                   skip++;
534                   end=start--;
535                   while (start>=0 && buf.charAt(start)!='/')
536                       start--;
537                   continue;
538 
539               default:
540                   if (skip>0 && --skip==0)
541                   {
542                       delStart=start>=0?start:0;
543                       if(delEnd==buf.length() && buf.charAt(delEnd-1)=='.')
544                           delStart++;
545                   }
546             }     
547             
548             // Do the delete
549             if (skip<=0 && delStart>=0 && delEnd>=delStart)
550             {  
551                 buf.delete(delStart,delEnd);
552                 delStart=delEnd=-1;
553                 if (skip>0)
554                     delEnd=end;
555             }
556             
557             end=start--;
558             while (start>=0 && buf.charAt(start)!='/')
559                 start--;
560         }      
561 
562         // Too many ..
563         if (skip>0)
564             return null;
565         
566         // Do the delete
567         if (delEnd>=0)
568             buf.delete(delStart,delEnd);
569 
570         return buf.toString();
571     }
572 
573     /* ------------------------------------------------------------ */
574     /** Convert a path to a compact form.
575      * All instances of "//" and "///" etc. are factored out to single "/" 
576      * @param path 
577      * @return path
578      */
579     public static String compactPath(String path)
580     {
581         if (path==null || path.length()==0)
582             return path;
583 
584         int state=0;
585         int end=path.length();
586         int i=0;
587         
588         loop:
589         while (i<end)
590         {
591             char c=path.charAt(i);
592             switch(c)
593             {
594                 case '?':
595                     return path;
596                 case '/':
597                     state++;
598                     if (state==2)
599                         break loop;
600                     break;
601                 default:
602                     state=0;
603             }
604             i++;
605         }
606         
607         if (state<2)
608             return path;
609         
610         StringBuffer buf = new StringBuffer(path.length());
611         buf.append(path,0,i);
612         
613         loop2:
614         while (i<end)
615         {
616             char c=path.charAt(i);
617             switch(c)
618             {
619                 case '?':
620                     buf.append(path,i,end);
621                     break loop2;
622                 case '/':
623                     if (state++==0)
624                         buf.append(c);
625                     break;
626                 default:
627                     state=0;
628                     buf.append(c);
629             }
630             i++;
631         }
632         
633         return buf.toString();
634     }
635 
636     /* ------------------------------------------------------------ */
637     /** 
638      * @param uri URI
639      * @return True if the uri has a scheme
640      */
641     public static boolean hasScheme(String uri)
642     {
643         for (int i=0;i<uri.length();i++)
644         {
645             char c=uri.charAt(i);
646             if (c==':')
647                 return true;
648             if (!(c>='a'&&c<='z' ||
649                   c>='A'&&c<='Z' ||
650                   (i>0 &&(c>='0'&&c<='9' ||
651                           c=='.' ||
652                           c=='+' ||
653                           c=='-'))
654                   ))
655                 break;
656         }
657         return false;
658     }
659     
660 }
661 
662 
663